library(tidyverse)
library(lubridate)

fix_emperors <- function(data) {
  data %>% 
    mutate(
      birth = case_when(
        index %in% c(1, 2, 4, 6) ~ update(birth, year = -year(birth)),
        TRUE                     ~ birth
      ),
      reign_start = case_when(
        index == 1 ~ update(reign_start, year = -year(reign_start)),
        TRUE       ~ reign_start
      )
    )
}

Roman emperors

The first exercise uses a dataset about roman emperors from the tidytuesday project (link). You can import it with:

raw_emperors <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-08-13/emperors.csv")
emperors <- fix_emperors(raw_emperors)

Here are a couple of questions to answer. Decide for yourselves if a particular question is best answered using a visualization, a table or a simple sentence.

How they rise

rising_to_power <- emperors %>% 
  count(rise, sort = TRUE)
rising_to_power %>% 
  mutate(rise = fct_reorder(rise, desc(n))) %>% 
  ggplot(aes(n, rise, fill = n)) +
  geom_col() +
  geom_text(aes(label = n), hjust = 1, color = "white") +
  guides(fill = "none") +
  theme_classic()

rising_to_power %>% 
  mutate(rise = fct_reorder(rise, desc(n))) %>% 
  ggplot(aes(rise, n, fill = n)) +
  geom_col() +
  geom_text(aes(label = n), hjust = 0.5, vjust = 0, color = "black",
            fontface = "bold") +
  guides(fill = "none") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

rising_to_power %>% 
  mutate(
    rise = fct_reorder(rise, desc(n))
    ) %>% 
  arrange(n) %>% 
  mutate(text_position = cumsum(n),
         text_position = text_position - c(0, diff(text_position)) / 2) %>% 
  ggplot(aes(x = 1, y = n, fill = rise)) +
  geom_col(width = 0.4) +
  geom_text(aes(label = n, y = text_position)) +
  coord_polar(theta = "y") +
  theme_void() +
  lims(x = c(0, 2))

How they fall

emperors %>% 
  count(cause, killer, sort = TRUE) %>% 
  ggplot(aes(cause, n, fill = killer)) +
  geom_col() +
  fishualize::scale_fill_fish_d()

most_common_cause <- emperors %>% 
  count(cause, sort = TRUE) %>% 
  pull(cause) %>% 
  head(1)

The most common cause was Assassination.

Dynamic Dynasties doing their thing

emperors %>% 
  count(dynasty, sort = TRUE)
## # A tibble: 8 x 2
##   dynasty            n
##   <chr>          <int>
## 1 Gordian           22
## 2 Constantinian     15
## 3 Severan            8
## 4 Nerva-Antonine     7
## 5 Flavian            6
## 6 Julio-Claudian     5
## 7 Valentinian        4
## 8 Theodosian         1
emperors %>% 
  mutate(reign = reign_end - reign_start) %>% 
  group_by(dynasty) %>% 
  summarise(
    top_emperor_reign = max(reign),
    romes_top_emperor = paste(name[reign == top_emperor_reign], collapse = " and "),
    reign = sum(reign)
  )
## # A tibble: 8 x 4
##   dynasty        top_emperor_reign romes_top_emperor     reign     
##   <chr>          <drtn>            <chr>                 <drtn>    
## 1 Constantinian  11259 days        Constantine the Great 54307 days
## 2 Flavian         5483 days        Domitian              10326 days
## 3 Gordian         5449 days        Gallienus             21377 days
## 4 Julio-Claudian 14825 days        Augustus              34445 days
## 5 Nerva-Antonine  8276 days        Antonius Pius         39263 days
## 6 Severan         7036 days        Caracalla             21363 days
## 7 Theodosian      5860 days        Theodosius I           5860 days
## 8 Valentinian     6024 days        Valentinian II        21418 days
mean_lifetime <- emperors %>% 
  mutate(life = death - birth) %>% 
  group_by(dynasty) %>% 
  summarise(life = mean(life, na.rm = TRUE))



new_emperors <- emperors %>% 
  mutate(life = death - birth)

lifetimes_plot <-
  new_emperors %>% 
  ggplot(aes(dynasty, life)) +
  geom_jitter(aes(color = cause, label = name), width = 0.1) +
  geom_point(data = mean_lifetime, color = "red")

plotly::ggplotly(lifetimes_plot)
emperors %>% 
  mutate(life = death - birth) %>%
  ggplot(aes(dynasty, life)) +
  geom_boxplot()

Dairy Products in the US

Another dataset (link) concerns dairy product consumption per person in the US across a number of years. Load it with

raw_dairy <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/milk_products_facts.csv")

dairy <- raw_dairy %>% 
  pivot_longer(-year, names_to = "product",
               values_to = "kg_pp") %>% 
  mutate(kg_pp = kg_pp * 0.4535924)
dairy %>% 
  ggplot(aes(year, kg_pp, color = product)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ product, scales = "free") + 
  guides(color = "none")